set.seed(1)

addTimeStamp <- function(saveInfo, tstart, info){
  timeDiff <- abs(round(difftime(Sys.time(), tstart, units = "mins"), 3))
  print(paste0("Adding Time Stamp : ", info , ", ", timeDiff, " minutes from start..."))
  saveInfo$timeStamps[[length(saveInfo$timeStamps) + 1]] <- list(
    info = info, 
    time = timeDiff
  )
  saveInfo
}

print(Sys.time())
## [1] "2020-02-27 18:15:56 PST"
#Print Input Params
print(params)
## $jobAttempt
## [1] 1
## 
## $project
## [1] "All_PBMC_Rep1"
## 
## $projectMetadata
## [1] "Project_Metadata.tsv"
## 
## $threads
## [1] 20
## 
## $out1
## [1] "/scratch/users/jgranja/BenchmarksLargePBMC/Large/outputSmall/Signac/Projects/All_PBMC_Rep1/All_PBMC_Rep1-Signac-1-DataImport-Benchmark.rds"
## 
## $out2
## [1] "/scratch/users/jgranja/BenchmarksLargePBMC/Large/outputSmall/Signac/Projects/All_PBMC_Rep1/All_PBMC_Rep1-Signac-1-DataImport-Save.rds"
#Get Project Metadata
df <- data.frame(readr::read_tsv(paste0(params$projectMetadata)))
## Parsed with column specification:
## cols(
##   Project = col_character(),
##   Metadata = col_character()
## )
#Get Sample Metadata
metadata <- data.frame(readr::read_tsv(df[paste0(df[,1]) == paste0(params$project), 2]))
## Parsed with column specification:
## cols(
##   Sample = col_character(),
##   Genome = col_character(),
##   Path = col_character()
## )
print(metadata)
##                     Sample Genome
## 1                 10k_pbmc   Hg19
## 2    frozen_sorted_pbmc_5k   Hg19
## 3            fresh_pbmc_5k   Hg19
## 4  frozen_unsorted_pbmc_5k   Hg19
## 5         pbmc_10k_nextgem   Hg19
## 6              pbmc_10k_v1   Hg19
## 7          pbmc_1k_nextgem   Hg19
## 8               pbmc_1k_v1   Hg19
## 9         pbmc_500_nextgem   Hg19
## 10             pbmc_500_v1   Hg19
## 11         pbmc_5k_nextgem   Hg19
## 12              pbmc_5k_v1   Hg19
## 13       scATAC_PBMC_D10T1   Hg19
## 14       scATAC_PBMC_D11T1   Hg19
## 15       scATAC_PBMC_D12T1   Hg19
## 16       scATAC_PBMC_D12T2   Hg19
## 17       scATAC_PBMC_D12T3   Hg19
##                                                                                                                               Path
## 1                   /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/10k_pbmc_fragments.tsv.gz
## 2      /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_sorted_pbmc_5k_fragments.tsv.gz
## 3              /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/fresh_pbmc_5k_fragments.tsv.gz
## 4    /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_unsorted_pbmc_5k_fragments.tsv.gz
## 5  /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_nextgem_fragments.tsv.gz
## 6       /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_v1_fragments.tsv.gz
## 7   /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_nextgem_fragments.tsv.gz
## 8        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_v1_fragments.tsv.gz
## 9  /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_nextgem_fragments.tsv.gz
## 10      /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_v1_fragments.tsv.gz
## 11  /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_nextgem_fragments.tsv.gz
## 12       /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_v1_fragments.tsv.gz
## 13        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D10T1.fragments.tsv.gz
## 14        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D11T1.fragments.tsv.gz
## 15        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T1.fragments.tsv.gz
## 16        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T2.fragments.tsv.gz
## 17        /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T3.fragments.tsv.gz
#Input Files
inputFiles <- paste0(metadata$Path)
names(inputFiles) <- metadata$Sample
print(inputFiles)
##                                                                                                                          10k_pbmc 
##                  "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/10k_pbmc_fragments.tsv.gz" 
##                                                                                                             frozen_sorted_pbmc_5k 
##     "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_sorted_pbmc_5k_fragments.tsv.gz" 
##                                                                                                                     fresh_pbmc_5k 
##             "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/fresh_pbmc_5k_fragments.tsv.gz" 
##                                                                                                           frozen_unsorted_pbmc_5k 
##   "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/bcc/10x/frozen_unsorted_pbmc_5k_fragments.tsv.gz" 
##                                                                                                                  pbmc_10k_nextgem 
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_nextgem_fragments.tsv.gz" 
##                                                                                                                       pbmc_10k_v1 
##      "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_10k_v1_fragments.tsv.gz" 
##                                                                                                                   pbmc_1k_nextgem 
##  "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_nextgem_fragments.tsv.gz" 
##                                                                                                                        pbmc_1k_v1 
##       "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_1k_v1_fragments.tsv.gz" 
##                                                                                                                  pbmc_500_nextgem 
## "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_nextgem_fragments.tsv.gz" 
##                                                                                                                       pbmc_500_v1 
##      "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_500_v1_fragments.tsv.gz" 
##                                                                                                                   pbmc_5k_nextgem 
##  "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_nextgem_fragments.tsv.gz" 
##                                                                                                                        pbmc_5k_v1 
##       "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/10x_Website/atac_pbmc_5k_v1_fragments.tsv.gz" 
##                                                                                                                 scATAC_PBMC_D10T1 
##        "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D10T1.fragments.tsv.gz" 
##                                                                                                                 scATAC_PBMC_D11T1 
##        "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D11T1.fragments.tsv.gz" 
##                                                                                                                 scATAC_PBMC_D12T1 
##        "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T1.fragments.tsv.gz" 
##                                                                                                                 scATAC_PBMC_D12T2 
##        "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T2.fragments.tsv.gz" 
##                                                                                                                 scATAC_PBMC_D12T3 
##        "/oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal_GEO/scATAC_PBMC_D12T3.fragments.tsv.gz"
#Genome
genome <- tolower(paste0(metadata$Genome[1]))

#Working Directory Relative to project
owd <- getwd()
setwd(dirname(paste0(params$out1)))

#Copy from storage to faster more conistent current working directory
for(i in seq_along(inputFiles)){
  system(paste0("cp ", inputFiles[i], " ", basename(inputFiles[i])))
}
inputFiles <- basename(inputFiles)
names(inputFiles) <- metadata$Sample
print(inputFiles)
##                                   10k_pbmc 
##                "10k_pbmc_fragments.tsv.gz" 
##                      frozen_sorted_pbmc_5k 
##   "frozen_sorted_pbmc_5k_fragments.tsv.gz" 
##                              fresh_pbmc_5k 
##           "fresh_pbmc_5k_fragments.tsv.gz" 
##                    frozen_unsorted_pbmc_5k 
## "frozen_unsorted_pbmc_5k_fragments.tsv.gz" 
##                           pbmc_10k_nextgem 
##   "atac_pbmc_10k_nextgem_fragments.tsv.gz" 
##                                pbmc_10k_v1 
##        "atac_pbmc_10k_v1_fragments.tsv.gz" 
##                            pbmc_1k_nextgem 
##    "atac_pbmc_1k_nextgem_fragments.tsv.gz" 
##                                 pbmc_1k_v1 
##         "atac_pbmc_1k_v1_fragments.tsv.gz" 
##                           pbmc_500_nextgem 
##   "atac_pbmc_500_nextgem_fragments.tsv.gz" 
##                                pbmc_500_v1 
##        "atac_pbmc_500_v1_fragments.tsv.gz" 
##                            pbmc_5k_nextgem 
##    "atac_pbmc_5k_nextgem_fragments.tsv.gz" 
##                                 pbmc_5k_v1 
##         "atac_pbmc_5k_v1_fragments.tsv.gz" 
##                          scATAC_PBMC_D10T1 
##       "scATAC_PBMC_D10T1.fragments.tsv.gz" 
##                          scATAC_PBMC_D11T1 
##       "scATAC_PBMC_D11T1.fragments.tsv.gz" 
##                          scATAC_PBMC_D12T1 
##       "scATAC_PBMC_D12T1.fragments.tsv.gz" 
##                          scATAC_PBMC_D12T2 
##       "scATAC_PBMC_D12T2.fragments.tsv.gz" 
##                          scATAC_PBMC_D12T3 
##       "scATAC_PBMC_D12T3.fragments.tsv.gz"
#Remove any tbi files in case of re-run
o <- suppressWarnings(file.remove(list.files(pattern = ".tbi")))

#Start Time
saveInfo <- list(timeStamps = 
    list(
        list(info = "Start", time = 0)
    ), 
    info = list()
)

#Threads based on how many times this job was attempted (killed by using too much memory)
if(params$jobAttempt == 1){
  threads <- params$threads
}else if(params$jobAttempt == 2){
  threads <- floor(params$threads / 2)
}else{
  threads <- 1
}
print(threads)
## [1] 20
#Load R Libraries
library(Signac)
library(Seurat)
library(GenomeInfoDb)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
library(GenomicRanges)
library(ggplot2)
library(future)
## 
## Attaching package: 'future'
## The following object is masked from 'package:GenomicRanges':
## 
##     values
## The following object is masked from 'package:IRanges':
## 
##     values
## The following object is masked from 'package:S4Vectors':
## 
##     values
#This is hidden but it looks like it applies to FeatureMatrix
# NOTE : This crashed everytime I used it so we are disabling it.
#Error: Failed to retrieve the result of MulticoreFuture (future_lapply-1) from the forked worker (on 
#localhost; PID 168992). Post-mortem diagnostic: No process exists with this PID, i.e. the forked 
#localhost worker is no longer alive.
#Error: Failed to retrieve the result of MulticoreFuture (future_lapply-5) from the forked worker (on 
#localhost; PID 129250). Post-mortem diagnostic: No process exists with this PID, i.e. the forked 
#localhost worker is no longer alive.
#options(future.globals.maxSize = 32 * 1024 ^ 3) #32 GB
#plan("multiprocess", workers = threads)
#plan()

#Get Hematopoiesis Peak Set from Corces et al.
peakFile <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE74nnn/GSE74912/suppl/GSE74912_ATACseq_All_Counts.txt.gz"
features <- readr::read_tsv(peakFile)[,1:3]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Chr = col_character()
## )
## See spec(...) for full column specifications.
features <- GRanges(features$Chr, IRanges(start = features$Start, end = features$End)) 
print(features)
## GRanges object with 590650 ranges and 0 metadata columns:
##            seqnames              ranges strand
##               <Rle>           <IRanges>  <Rle>
##        [1]     chr1         10025-10525      *
##        [2]     chr1         13252-13752      *
##        [3]     chr1         16019-16519      *
##        [4]     chr1         96376-96876      *
##        [5]     chr1       115440-115940      *
##        ...      ...                 ...    ...
##   [590646]     chrX 154880741-154881241      *
##   [590647]     chrX 154891824-154892324      *
##   [590648]     chrX 154896342-154896842      *
##   [590649]     chrX 154912441-154912941      *
##   [590650]     chrX 155259860-155260360      *
##   -------
##   seqinfo: 23 sequences from an unspecified genome; no seqlengths
tstart <- Sys.time()
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("getPeaks"))
## [1] "Adding Time Stamp : getPeaks, 0 minutes from start..."
#For Computing TSS Scores Per Cell
if(!requireNamespace("EnsDb.Hsapiens.v75", quietly = TRUE))
  BiocManager::install("EnsDb.Hsapiens.v75")
library(EnsDb.Hsapiens.v75)
## Loading required package: ensembldb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: AnnotationFilter
## 
## Attaching package: 'AnnotationFilter'
## The following object is masked from 'package:future':
## 
##     value
## 
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
## 
##     filter
geneRanges <- genes(EnsDb.Hsapiens.v75)
tssRanges <- GRanges(
  seqnames = seqnames(geneRanges),
  ranges = IRanges(start = start(geneRanges), width = 2),
  strand = strand(geneRanges)
)
seqlevelsStyle(tssRanges) <- 'UCSC'
tssRanges <- keepStandardChromosomes(tssRanges, pruning.mode = 'coarse')
tssRanges1 <- tssRanges[which(seqnames(tssRanges) %in% c("chr1", "chr2", "chr3"))]
print(tssRanges1)
## GRanges object with 12614 ranges and 0 metadata columns:
##           seqnames              ranges strand
##              <Rle>           <IRanges>  <Rle>
##       [1]     chr1         11869-11870      +
##       [2]     chr1         14363-14364      -
##       [3]     chr1         29554-29555      +
##       [4]     chr1         34554-34555      -
##       [5]     chr1         52473-52474      +
##       ...      ...                 ...    ...
##   [12610]     chr3 197848259-197848260      -
##   [12611]     chr3 197879237-197879238      +
##   [12612]     chr3 197923617-197923618      +
##   [12613]     chr3 197950368-197950369      -
##   [12614]     chr3 197955065-197955066      +
##   -------
##   seqinfo: 25 sequences from an unspecified genome; no seqlengths
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("loadGenome"))
## [1] "Adding Time Stamp : loadGenome, 0.051 minutes from start..."
#Process each sample by getting a counts matrix and then TSS scores so that we can filter bad cells properly
seuratOut <- c()
for(i in seq_along(inputFiles)){

  print(paste0("Running Sample : ", names(inputFiles)[i]))

  #1. Index Tabix
  Rsamtools::indexTabix(inputFiles[i], format = "bed")
  saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("IndexTabix=", names(inputFiles)[i]))

  #2. Filter low cell # fragments
  fragsPerCell <- table(data.table::fread(inputFiles[i], sep = "\t", select = 4)$V4)
  fragsPerCell <- fragsPerCell[fragsPerCell >= 1000]
  print(paste0("nCells = ", length(fragsPerCell)))
  print(paste0("median frags per cell = ", median(fragsPerCell)))
  saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("FilterLowBC=", names(inputFiles)[i]))

  #3. Create cell x peak matrix
  countsMat <- FeatureMatrix(
    fragments = inputFiles[i],
    features = features,
    cells = names(fragsPerCell),
    verbose = TRUE
  )
  saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("CellByPeak=", names(inputFiles)[i]))

  #4. Create Seurat Object
  seuratObj <- CreateSeuratObject(
    counts = countsMat,
    assay = 'peaks',
    project = 'ATAC',
    min.cells = 1,
    min.features = 1
  )
  seuratObj$sampleName <- names(inputFiles)[i]

  #5. Set Fragments
  seuratObj <- SetFragments(
    object = seuratObj,
    file = inputFiles[i]
  )
  saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("CreateSeurat=", names(inputFiles)[i]))

  #6. TSS Scores
  #Note the tutorial does 2k TSS, when using downsampled test datasets (which I know the TSS scores and true # pass filter) 
  #this is too little to get an accurate QC. By default there will be now all TSS on chr1,chr2,chr3 being computed to ensure ok stability.
  #Im worried that this score isnt stable because its not the signal at the center of the TSS and when running it at different
  #Number of TSS I get different results. Also, this value depends on the number of reads in peaks somehow??
  seuratObj <- TSSEnrichment(object = seuratObj, tss.positions = tssRanges1)
  print(paste0("median TSS per cell = ", median(seuratObj$TSS.enrichment)))

  df <- data.frame(x = log10(seuratObj$nCount_peaks + 1), y = seuratObj$TSS.enrichment)
  p <- ggplot(df, aes(x, y)) + 
    geom_point() + theme_bw() +
    ylim(c(0, quantile(df$y, 0.99))) +
    xlim(c(quantile(df$x, 0.001), quantile(df$x, 0.99)))
  print(p)

  #Plot TSS Scores
  seuratObj$highTSS <- ifelse(seuratObj$TSS.enrichment >= 2, 'High', 'Low')
  p <- TSSPlot(seuratObj, group.by = 'highTSS') + ggtitle(paste0("TSS enrichment score : ", names(inputFiles)[i])) + NoLegend()
  print(p)

  #7. Filter Cells with TSS greater than 2 (I found this score is very unstable with 
  #low # of fragment cells but we can at least filter junk)
  seuratObj <- subset(seuratObj, subset = TSS.enrichment >= 2 & seuratObj$nCount_peaks >= 500)
  print(paste0("nCells pass filter = ", ncol(seuratObj)))

  outFile <- paste0(names(inputFiles)[i], ".rds")
  saveRDS(seuratObj, outFile)

  rm(countsMat)
  rm(seuratObj)
  gc()

  seuratOut <- c(seuratOut, outFile)

}
## [1] "Running Sample : 10k_pbmc"
## [1] "Adding Time Stamp : IndexTabix=10k_pbmc, 1.497 minutes from start..."
## [1] "nCells = 16436"
## [1] "median frags per cell = 7660.5"
## [1] "Adding Time Stamp : FilterLowBC=10k_pbmc, 4.073 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=10k_pbmc, 72.786 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=10k_pbmc, 73.196 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score
## [1] "median TSS per cell = 2.805625"
## Warning: Removed 338 rows containing missing values (geom_point).

## [1] "nCells pass filter = 9928"
## [1] "Running Sample : frozen_sorted_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=frozen_sorted_pbmc_5k, 134.823 minutes from start..."
## [1] "nCells = 9837"
## [1] "median frags per cell = 3710"
## [1] "Adding Time Stamp : FilterLowBC=frozen_sorted_pbmc_5k, 135.62 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=frozen_sorted_pbmc_5k, 189.052 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=frozen_sorted_pbmc_5k, 189.217 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 2.814"
## Warning: Removed 207 rows containing missing values (geom_point).

## [1] "nCells pass filter = 6429"
## [1] "Running Sample : fresh_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=fresh_pbmc_5k, 212.667 minutes from start..."
## [1] "nCells = 4966"
## [1] "median frags per cell = 7198.5"
## [1] "Adding Time Stamp : FilterLowBC=fresh_pbmc_5k, 213.355 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=fresh_pbmc_5k, 258.335 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=fresh_pbmc_5k, 258.474 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.02647285190714"
## Warning: Removed 105 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4460"
## [1] "Running Sample : frozen_unsorted_pbmc_5k"
## [1] "Adding Time Stamp : IndexTabix=frozen_unsorted_pbmc_5k, 280.235 minutes from start..."
## [1] "nCells = 30433"
## [1] "median frags per cell = 1343"
## [1] "Adding Time Stamp : FilterLowBC=frozen_unsorted_pbmc_5k, 281.211 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=frozen_unsorted_pbmc_5k, 346.535 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=frozen_unsorted_pbmc_5k, 346.707 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.206"
## Warning: Removed 591 rows containing missing values (geom_point).

## [1] "nCells pass filter = 5352"
## [1] "Running Sample : pbmc_10k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_10k_nextgem, 368.613 minutes from start..."
## [1] "nCells = 13785"
## [1] "median frags per cell = 9784"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_10k_nextgem, 370.425 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_10k_nextgem, 430.118 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_10k_nextgem, 430.466 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 2.94531999999998"
## Warning: Removed 289 rows containing missing values (geom_point).

## [1] "nCells pass filter = 10060"
## [1] "Running Sample : pbmc_10k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_10k_v1, 483.168 minutes from start..."
## [1] "nCells = 9584"
## [1] "median frags per cell = 8861.5"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_10k_v1, 484.211 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_10k_v1, 534.619 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_10k_v1, 534.878 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.05715789473684"
## Warning: Removed 202 rows containing missing values (geom_point).

## [1] "nCells pass filter = 7700"
## [1] "Running Sample : pbmc_1k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_1k_nextgem, 568.809 minutes from start..."
## [1] "nCells = 1102"
## [1] "median frags per cell = 14492"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_1k_nextgem, 568.979 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_1k_nextgem, 598.654 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_1k_nextgem, 598.72 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.45349140845072"
## Warning: Removed 26 rows containing missing values (geom_point).

## [1] "nCells pass filter = 1065"
## [1] "Running Sample : pbmc_1k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_1k_v1, 607.576 minutes from start..."
## [1] "nCells = 978"
## [1] "median frags per cell = 8529"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_1k_v1, 607.694 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_1k_v1, 640.84 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_1k_v1, 640.9 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.43303655435472"
## Warning: Removed 21 rows containing missing values (geom_point).

## [1] "nCells pass filter = 889"
## [1] "Running Sample : pbmc_500_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_500_nextgem, 648.168 minutes from start..."
## [1] "nCells = 548"
## [1] "median frags per cell = 15828.5"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_500_nextgem, 648.258 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_500_nextgem, 669.875 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_500_nextgem, 669.898 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.45889810344829"
## Warning: Removed 13 rows containing missing values (geom_point).

## [1] "nCells pass filter = 527"
## [1] "Running Sample : pbmc_500_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_500_v1, 676.091 minutes from start..."
## [1] "nCells = 439"
## [1] "median frags per cell = 10959"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_500_v1, 676.163 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_500_v1, 702.295 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_500_v1, 702.314 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.44822330097086"
## Warning: Removed 11 rows containing missing values (geom_point).

## [1] "nCells pass filter = 419"
## [1] "Running Sample : pbmc_5k_nextgem"
## [1] "Adding Time Stamp : IndexTabix=pbmc_5k_nextgem, 708.436 minutes from start..."
## [1] "nCells = 5278"
## [1] "median frags per cell = 13394"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_5k_nextgem, 709.242 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_5k_nextgem, 753.752 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_5k_nextgem, 753.959 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.25727551382001"
## Warning: Removed 112 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4870"
## [1] "Running Sample : pbmc_5k_v1"
## [1] "Adding Time Stamp : IndexTabix=pbmc_5k_v1, 782.63 minutes from start..."
## [1] "nCells = 4583"
## [1] "median frags per cell = 8591"
## [1] "Adding Time Stamp : FilterLowBC=pbmc_5k_v1, 783.135 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=pbmc_5k_v1, 826.121 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=pbmc_5k_v1, 826.272 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.294"
## Warning: Removed 97 rows containing missing values (geom_point).

## [1] "nCells pass filter = 4049"
## [1] "Running Sample : scATAC_PBMC_D10T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D10T1, 846.089 minutes from start..."
## [1] "nCells = 13618"
## [1] "median frags per cell = 1416"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D10T1, 846.972 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D10T1, 905.763 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D10T1, 905.885 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.206"
## Warning: Removed 288 rows containing missing values (geom_point).

## [1] "nCells pass filter = 3105"
## [1] "Running Sample : scATAC_PBMC_D11T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D11T1, 924.331 minutes from start..."
## [1] "nCells = 24945"
## [1] "median frags per cell = 1282"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D11T1, 925.083 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D11T1, 997.564 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D11T1, 997.654 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.072"
## Warning: Removed 522 rows containing missing values (geom_point).

## [1] "nCells pass filter = 2667"
## [1] "Running Sample : scATAC_PBMC_D12T1"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T1, 1008.652 minutes from start..."
## [1] "nCells = 2103"
## [1] "median frags per cell = 2470"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T1, 1008.975 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T1, 1047.165 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T1, 1047.2 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 1.6884"
## Warning: Removed 47 rows containing missing values (geom_point).

## [1] "nCells pass filter = 870"
## [1] "Running Sample : scATAC_PBMC_D12T2"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T2, 1055.693 minutes from start..."
## [1] "nCells = 1598"
## [1] "median frags per cell = 17165.5"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T2, 1055.992 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T2, 1093.319 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T2, 1093.384 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.23970151199164"
## Warning: Removed 34 rows containing missing values (geom_point).

## [1] "nCells pass filter = 1284"
## [1] "Running Sample : scATAC_PBMC_D12T3"
## [1] "Adding Time Stamp : IndexTabix=scATAC_PBMC_D12T3, 1105.421 minutes from start..."
## [1] "nCells = 2082"
## [1] "median frags per cell = 7006"
## [1] "Adding Time Stamp : FilterLowBC=scATAC_PBMC_D12T3, 1105.707 minutes from start..."
## Extracting reads overlapping genomic regions
## Constructing matrix
## [1] "Adding Time Stamp : CellByPeak=scATAC_PBMC_D12T3, 1140.511 minutes from start..."
## [1] "Adding Time Stamp : CreateSeurat=scATAC_PBMC_D12T3, 1140.597 minutes from start..."
## Finding + strand cut sites
## Finding - strand cut sites
## Computing mean insertion frequency in flanking regions
## Normalizing TSS score

## [1] "median TSS per cell = 3.20224736842107"
## Warning: Removed 44 rows containing missing values (geom_point).

## [1] "nCells pass filter = 1857"
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("Completed"))
## [1] "Adding Time Stamp : Completed, 1151.833 minutes from start..."
print(Sys.time() - tstart)
## Time difference of 19.19722 hours
#Set Original Working Directory
#Save Output
seuratOut <- file.path(getwd(), seuratOut)
setwd(owd)
saveRDS(saveInfo, params$out1)
saveRDS(seuratOut, params$out2)

#Print Session Info
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] EnsDb.Hsapiens.v75_2.99.0 ensembldb_2.10.2         
##  [3] AnnotationFilter_1.10.0   GenomicFeatures_1.38.2   
##  [5] AnnotationDbi_1.48.0      Biobase_2.46.0           
##  [7] future_1.16.0             ggplot2_3.2.1            
##  [9] GenomicRanges_1.38.0      GenomeInfoDb_1.22.0      
## [11] IRanges_2.20.2            S4Vectors_0.24.3         
## [13] BiocGenerics_0.32.0       Seurat_3.1.2             
## [15] Signac_0.2.2             
## 
## loaded via a namespace (and not attached):
##   [1] sn_1.5-5                    plyr_1.8.5                 
##   [3] igraph_1.2.4.2              lazyeval_0.2.2             
##   [5] splines_3.6.1               BiocParallel_1.20.1        
##   [7] listenv_0.8.0               TH.data_1.0-10             
##   [9] digest_0.6.23               htmltools_0.4.0            
##  [11] gdata_2.18.0                magrittr_1.5               
##  [13] memoise_1.1.0               BSgenome_1.54.0            
##  [15] cluster_2.1.0               ROCR_1.0-7                 
##  [17] ggfittext_0.8.1             globals_0.12.5             
##  [19] Biostrings_2.54.0           readr_1.3.1                
##  [21] RcppParallel_4.4.4          matrixStats_0.55.0         
##  [23] R.utils_2.9.2               sandwich_2.5-1             
##  [25] prettyunits_1.1.1           colorspace_1.4-1           
##  [27] blob_1.2.1                  rappdirs_0.3.1             
##  [29] ggrepel_0.8.1               xfun_0.12                  
##  [31] dplyr_0.8.4                 crayon_1.3.4               
##  [33] RCurl_1.98-1.1              jsonlite_1.6               
##  [35] survival_2.44-1.1           zoo_1.8-7                  
##  [37] ape_5.3                     glue_1.3.1                 
##  [39] gtable_0.3.0                zlibbioc_1.32.0            
##  [41] XVector_0.26.0              leiden_0.3.2               
##  [43] DelayedArray_0.12.2         future.apply_1.4.0         
##  [45] scales_1.1.0                mvtnorm_1.0-12             
##  [47] DBI_1.1.0                   bibtex_0.4.2.2             
##  [49] Rcpp_1.0.3                  metap_1.3                  
##  [51] plotrix_3.7-7               viridisLite_0.3.0          
##  [53] progress_1.2.2              reticulate_1.14            
##  [55] bit_1.1-15.1                rsvd_1.0.2                 
##  [57] SDMTools_1.1-221.2          tsne_0.1-3                 
##  [59] htmlwidgets_1.5.1           httr_1.4.1                 
##  [61] gplots_3.0.1.2              RColorBrewer_1.1-2         
##  [63] TFisher_0.2.0               ica_1.0-2                  
##  [65] farver_2.0.3                pkgconfig_2.0.3            
##  [67] XML_3.99-0.3                R.methodsS3_1.7.1          
##  [69] ggseqlogo_0.1               uwot_0.1.5                 
##  [71] labeling_0.3                tidyselect_1.0.0           
##  [73] rlang_0.4.4                 reshape2_1.4.3             
##  [75] munsell_0.5.0               tools_3.6.1                
##  [77] RSQLite_2.2.0               ggridges_0.5.2             
##  [79] evaluate_0.14               stringr_1.4.0              
##  [81] yaml_2.2.1                  npsurv_0.4-0               
##  [83] knitr_1.27                  bit64_0.9-7                
##  [85] fitdistrplus_1.0-14         caTools_1.18.0             
##  [87] purrr_0.3.3                 RANN_2.6.1                 
##  [89] pbapply_1.4-2               nlme_3.1-140               
##  [91] R.oo_1.23.0                 biomaRt_2.40.5             
##  [93] compiler_3.6.1              curl_4.3                   
##  [95] plotly_4.9.1                png_0.1-7                  
##  [97] lsei_1.2-0                  tibble_2.1.3               
##  [99] stringi_1.4.5               lattice_0.20-38            
## [101] ProtGenerics_1.18.0         Matrix_1.2-17              
## [103] multtest_2.42.0             vctrs_0.2.2                
## [105] mutoss_0.1-12               pillar_1.4.3               
## [107] lifecycle_0.1.0             Rdpack_0.11-1              
## [109] lmtest_0.9-37               RcppAnnoy_0.0.14           
## [111] data.table_1.12.8           cowplot_1.0.0              
## [113] bitops_1.0-6                irlba_2.3.3                
## [115] gbRd_0.4-11                 gggenes_0.4.0              
## [117] patchwork_1.0.0             rtracklayer_1.46.0         
## [119] R6_2.4.1                    KernSmooth_2.23-15         
## [121] gridExtra_2.3               codetools_0.2-16           
## [123] MASS_7.3-51.4               gtools_3.8.1               
## [125] assertthat_0.2.1            SummarizedExperiment_1.16.1
## [127] withr_2.1.2                 GenomicAlignments_1.22.1   
## [129] sctransform_0.2.1           Rsamtools_2.2.1            
## [131] mnormt_1.5-5                multcomp_1.4-12            
## [133] GenomeInfoDbData_1.2.2      hms_0.5.3                  
## [135] grid_3.6.1                  tidyr_1.0.2                
## [137] rmarkdown_2.1               Rtsne_0.15                 
## [139] numDeriv_2016.8-1.1